* This file: .\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do

* Regression discontinuities - build data-set
cd D:\E\freddiemac\april2021\full

// cumulative data-set, for monthly files
clear all
fs historical_data_????Q?.dta
append using `r(files)', force

tab year
gen month=real(substr(string(dt_first_pi),-2,2))
drop if month==.
drop if year==.
tab year

gen zip2=real(substr(string(zipcode),1,2)) if zipcode>=1000
	* range of zip-codes in US: 00001 – 99950

sum fico mi_pct cnt_units occpy_sts ltv

replace fico=. if !inrange(fico,300,850)
replace mi_pct=. if !inrange(mi_pct,0,55)
replace cnt_units=. if !inrange(cnt_units,1,4)
replace occpy_sts="" if !inlist(occpy_sts,"P","I","S")
replace ltv=. if !inrange(ltv,6,200)
replace st="" if inlist(st,"PR","VI","GU") 

foreach v in P I S {
	gen occ_`v'=(occpy_sts=="`v'") if inlist(occpy_sts,"P","I","S")
}
sum fico mi_pct cnt_units occpy_sts ltv zip2 occ_P occ_I occ_S

gen myear = ym(year, month) 
format myear %tm 
format %tmMon_CCYY myear
format %tmNN/CCYY myear
format %tmCCYY-NN myear

gen post=(myear>=576)

*iebaltab fico mi_pct cnt_units ltv occ_P occ_I occ_S zip2, grpvar(post) save(balance_historical_data_all_full.xls)

gen fico_2d=real(substr(string(fico),1,2))

compress
note: Created on `= c(current_date)'
note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
compress
pwd
save ./iversen-rehm_cps2021/historical_data_all_full.dta

// Sample in article
* use historical_data_all_full.dta, clear
tab loan_purpose
	* P = Purchase
	* C = Refinance - Cash Out
	* N = Refinance - No Cash Out
	* R = Refinance - Not Specified
	* 9 =Not Available

keep if inrange(fico,620,850)
keep if orig_loan_term==360 // 30-year mortgage
keep if ltv<=80
keep if mi_pct==0 // no mortgage insurance
keep if cnt_units==1 // single units
keep if occpy_sts=="P" // P = Primary Residence
drop if inlist(st,"PR","VI","GU")
encode st, gen(state)

tab loan_purpose
	* P = Purchase
	* C = Refinance - Cash Out
	* N = Refinance - No Cash Out
	* R = Refinance - Not Specified
	* 9 =Not Available

tab fico_2d

compress
note: Created on `= c(current_date)'
note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
compress
pwd
save ./iversen-rehm_cps2021/historical_data_all_sample.dta


// Balance in sub-sample, by FICO-2d
/*
cd D:\E\freddiemac\april2021\full
use historical_data_all_sample.dta, clear
cd C:\Dropbox\Rehm\iversen\replication\Ch5\mortgages_usa\balance_tests/
levelsof fico_2d, local(F)
foreach f of local F {
	di "`f'"
	iebaltab fico mi_pct ltv zip2 if fico_2d==`f', grpvar(post) save(balance_historical_data_all_sample_fico_`f'.xls)
}
*/

// Collapse at year-month-level, full sample	
cd D:\E\freddiemac\april2021\full\iversen-rehm_cps2021/
use historical_data_all_full.dta, clear
cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6
	
	* dispersion measures at year-month-level
	local b "year month"
	bys `b': egen gini=gini(int_rt)

	*local b "year month"
	cap drop M dev?
	bys `b': egen M=mean(int_rt)
	gen dev1=abs((int_rt-M)/int_rt)
	label var dev1 "abs((int_rt-M)/int_rt)"
	gen dev2=abs((int_rt-M)/M)
	label var dev2 "abs((int_rt-M)/M)"

	*local b "year month"
	collapse (min) fico_min=fico (max) fico_max=fico (p50) fico_p50=fico ///
		(mean) my=int_rt gini dev1 dev2 M  fico_mean=fico (sd) sd=int_rt (count) N=int_rt ///
		, by(`b')

	cap drop cv
	gen cv=(sd/my)*100
	replace gini=gini*100
	recode gini cv (0=.)

	label var my "Interest rates: Mean"
	label var cv "Interest rates: Coefficient of variation"
	label var sd "Interest rates: Standard deviation"
	label var gini "Interest rates: Gini"
	label var dev1 "abs((int_rt-M)/int_rt)"
	label var dev2 "abs((int_rt-M)/M)"

	cap drop time
	egen time=group(year month), label

	gen myear = ym(year, month) 
	format myear %tm 
	format %tmMon_CCYY myear
	format %tmNN/CCYY myear
	format %tmCCYY-NN myear
	
	isid year month

	compress
	note: Created on `= c(current_date)'
	note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
	compress
	pwd
	saveold ./mortgages_usa/year_month_full.dta, version(12)

******
* * In preparation for Figure 3:
******

cd D:\E\freddiemac\april2021\full\iversen-rehm_cps2021/
use historical_data_all_full.dta, clear
cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6

	cap drop fico620
	gen fico620=(fico<620)
	tabstat fico620, by(post) f(%4.3f)
	tabstat fico620, by(year) f(%4.3f)
	collapse (mean) fico620, by(myear year month)
	
	replace fico620=fico620*100
	label var fico620 "% of mortgages with FICO<620"
	
	compress
	note: Created on `= c(current_date)'
	note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
	pwd
	saveold ./mortgages_usa/year_month_full_fico620.dta, version(12)


***********************************
* Generate data-set at the fico2d-time level
* Time: month, trimester, quarter, half, year
***********************************

	***********************************
	// fico2d-month
	***********************************

	cd D:\E\freddiemac\april2021\full\iversen-rehm_cps2021/
	use historical_data_all_sample.dta, clear
	cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6

	keep if year<=2019
	drop if fico_2d>=82 // groups are too small
	tab year

	// Potential covariates
	* loan purpose
	foreach v in P C N R {
		gen purpose_`v'=(loan_purpose=="`v'") if loan_purpose!="9"
	}

	foreach v in B C R T {
		gen channel_`v'=(channel=="`v'") if channel!="9"
	}

	foreach v in CO CP MH PU SF {
		gen proptype_`v'=(prop_type=="`v'") if prop_type!="99"
	}

	gen borrowers=(cnt_borr==2) if inlist(cnt_borr,1,2)

	d cltv dti ltv 
	sum cltv dti ltv 
	replace cltv=. if !inrange(cltv,6,200)
	replace dti=. if !inrange(dti,0,65) 
	replace ltv=. if !inrange(ltv,6,80)

		* dispersion measures at year-month-level
		local b "year month fico_2d"
		bys `b': egen gini=gini(int_rt)

		*local b "year month fico_2d"
		cap drop M dev?
		bys `b': egen M=mean(int_rt)
		gen dev1=abs((int_rt-M)/int_rt)
		label var dev1 "abs((int_rt-M)/int_rt)"
		gen dev2=abs((int_rt-M)/M)
		label var dev2 "abs((int_rt-M)/M)"

		*local b "year month fico_2d"
		collapse (min) fico_min=fico (max) fico_max=fico (p50) fico_p50=fico ///
			(mean) my=int_rt gini dev1 dev2 M  fico_mean=fico ///
				purpose_* channel_* proptype_* borrowers cltv dti ltv ///
			(sd) sd=int_rt (count) N=int_rt ///
			, by(`b')

		cap drop cv
		gen cv=(sd/my)*100
		replace gini=gini*100
		recode gini cv (0=.)

		label var my "Interest rates: Mean"
		label var cv "Interest rates: Coefficient of variation"
		label var sd "Interest rates: Standard deviation"
		label var gini "Interest rates: Gini"
		label var dev1 "abs((int_rt-M)/int_rt)"
		label var dev2 "abs((int_rt-M)/M)"

		label var purpose_P "Loan purpose: Purchase"
		label var purpose_C "Loan purpose: Refinance - Cash Out"
		label var purpose_N "Loan purpose: Refinance - No Cash Out"
		label var purpose_R "Loan purpose: Refinance - Not Specified"
		*label var loan_9 "Loan purpose: Not Available"

		label var proptype_CO "Condo"
		label var proptype_PU "PUD"
		label var proptype_MH "Manufactured Housing"
		label var proptype_SF "Single-Family"
		label var proptype_CP "Co-op"

		label var channel_R "Retail"
		label var channel_B "Broker"
		label var channel_C "Correspondent"
		label var channel_T "TPO Not Specified"
		
		label var borrowers "Share of two vs. one borrowers"
		
		label var cltv "Original combined loan-to-value (CLTV)"
		label var dti  "Original debt-to-income (DTI) ratio"
		label var ltv  "Original loan-to-value (LTV)"

		cap drop time
		egen time=group(year month), label

		gen myear = ym(year, month) 
		format myear %tm 
		format %tmMon_CCYY myear
		format %tmNN/CCYY myear
		format %tmCCYY-NN myear
		
		xtset fico_2d myear 
		
		label define fico_2d ///
			62 "620-629" ///
			63 "630-639" ///
			64 "640-649" ///
			65 "650-659" ///
			66 "660-669" ///
			67 "670-679" ///
			68 "680-689" ///
			69 "690-699" ///
			70 "700-709" ///
			71 "710-719" ///
			72 "720-729" ///
			73 "730-739" ///
			74 "740-749" ///
			75 "750-759" ///
			76 "760-769" ///
			77 "770-779" ///
			78 "780-789" ///
			79 "790-799" ///
			80 "800-809" ///
			81 "810-819" ///
			82 "820-829" ///
			83 "830-850", modify
		label val  fico_2d fico_2d 

		note: Created on `= c(current_date)'
		note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
		compress
		pwd
		saveold ./mortgages_usa/year_month_sample_fico2d.dta, version(12)
		

	***********************************
	// fico2d-quarter
	***********************************

	cd D:\E\freddiemac\april2021\full\iversen-rehm_cps2021/
	use historical_data_all_sample.dta, clear
	cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\

	keep if year<=2019
	drop if fico_2d>=82 // groups are too small
	tab year
		
		cap drop quarter
		recode month (1/3=1) (4/6=2) (7/9=3) (10/12=4), gen(quarter)

		* dispersion measures at year-quarter-level
		local b "year quarter fico_2d"
		bys `b': egen gini=gini(int_rt)

		*local b "year quarter fico_2d"
		cap drop M dev?
		bys `b': egen M=mean(int_rt)
		gen dev1=abs((int_rt-M)/int_rt)
		label var dev1 "abs((int_rt-M)/int_rt)"
		gen dev2=abs((int_rt-M)/M)
		label var dev2 "abs((int_rt-M)/M)"

		*local b "year quarter fico_2d"
		collapse (min) fico_min=fico (max) fico_max=fico (p50) fico_p50=fico ///
			(mean) my=int_rt gini dev1 dev2 M  fico_mean=fico (sd) sd=int_rt (count) N=int_rt ///
			, by(`b')

		cap drop cv
		gen cv=(sd/my)*100
		replace gini=gini*100
		recode gini cv (0=.)

		label var my "Interest rates: Mean"
		label var cv "Interest rates: Coefficient of variation"
		label var sd "Interest rates: Standard deviation"
		label var gini "Interest rates: Gini"
		label var dev1 "abs((int_rt-M)/int_rt)"
		label var dev2 "abs((int_rt-M)/M)"

		cap drop time
		egen time=group(year quarter), label

		xtset fico_2d time
		
		label define fico_2d ///
			62 "620-629" ///
			63 "630-639" ///
			64 "640-649" ///
			65 "650-659" ///
			66 "660-669" ///
			67 "670-679" ///
			68 "680-689" ///
			69 "690-699" ///
			70 "700-709" ///
			71 "710-719" ///
			72 "720-729" ///
			73 "730-739" ///
			74 "740-749" ///
			75 "750-759" ///
			76 "760-769" ///
			77 "770-779" ///
			78 "780-789" ///
			79 "790-799" ///
			80 "800-809" ///
			81 "810-819" ///
			82 "820-829" ///
			83 "830-850", modify
		label val fico_2d fico_2d 

		note: Created on `= c(current_date)'
		note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
		compress
		pwd
		saveold ./mortgages_usa/year_quarter_sample_fico2d.dta, version(12)

	***********************************
	// fico2d-trimester
	***********************************

	cd D:\E\freddiemac\april2021\full\iversen-rehm_cps2021/
	use historical_data_all_sample.dta, clear
	cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\

	keep if year<=2019
	drop if fico_2d>=82 // groups are too small
	tab year

		cap drop trimester
		recode month (1/4=1) (5/8=2) (9/12=3), gen(trimester)

		* dispersion measures at year-trimester-level
		local b "year trimester fico_2d"
		bys `b': egen gini=gini(int_rt)

		*local b "year trimester fico_2d"
		cap drop M dev?
		bys `b': egen M=mean(int_rt)
		gen dev1=abs((int_rt-M)/int_rt)
		label var dev1 "abs((int_rt-M)/int_rt)"
		gen dev2=abs((int_rt-M)/M)
		label var dev2 "abs((int_rt-M)/M)"

		*local b "year trimester fico_2d"
		collapse (min) fico_min=fico (max) fico_max=fico (p50) fico_p50=fico ///
			(mean) my=int_rt gini dev1 dev2 M  fico_mean=fico (sd) sd=int_rt (count) N=int_rt ///
			, by(`b')

		cap drop cv
		gen cv=(sd/my)*100
		replace gini=gini*100
		recode gini cv (0=.)

		label var my "Interest rates: Mean"
		label var cv "Interest rates: Coefficient of variation"
		label var sd "Interest rates: Standard deviation"
		label var gini "Interest rates: Gini"
		label var dev1 "abs((int_rt-M)/int_rt)"
		label var dev2 "abs((int_rt-M)/M)"

		cap drop time
		egen time=group(year trimester), label

		xtset fico_2d time
		
		label define fico_2d ///
			62 "620-629" ///
			63 "630-639" ///
			64 "640-649" ///
			65 "650-659" ///
			66 "660-669" ///
			67 "670-679" ///
			68 "680-689" ///
			69 "690-699" ///
			70 "700-709" ///
			71 "710-719" ///
			72 "720-729" ///
			73 "730-739" ///
			74 "740-749" ///
			75 "750-759" ///
			76 "760-769" ///
			77 "770-779" ///
			78 "780-789" ///
			79 "790-799" ///
			80 "800-809" ///
			81 "810-819" ///
			82 "820-829" ///
			83 "830-850", modify
		label val fico_2d fico_2d 

		note: Created on `= c(current_date)'
		note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
		compress
		pwd
		saveold ./mortgages_usa/year_trimester_sample_fico2d.dta, version(12)

	***********************************
	// fico2d-biannual
	***********************************

	cd D:\E\freddiemac\april2021\full\iversen-rehm_cps2021/
	use historical_data_all_sample.dta, clear
	cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\

	keep if year<=2019
	drop if fico_2d>=82 // groups are too small
	tab year

		cap drop biannual
		recode month (1/6=1) (7/12=2), gen(biannual)

		* dispersion measures at year-biannual-level
		local b "year biannual fico_2d"
		bys `b': egen gini=gini(int_rt)

		*local b "year biannual fico_2d"
		cap drop M dev?
		bys `b': egen M=mean(int_rt)
		gen dev1=abs((int_rt-M)/int_rt)
		label var dev1 "abs((int_rt-M)/int_rt)"
		gen dev2=abs((int_rt-M)/M)
		label var dev2 "abs((int_rt-M)/M)"

		*local b "year biannual fico_2d"
		collapse (min) fico_min=fico (max) fico_max=fico (p50) fico_p50=fico ///
			(mean) my=int_rt gini dev1 dev2 M  fico_mean=fico (sd) sd=int_rt (count) N=int_rt ///
			, by(`b')

		cap drop cv
		gen cv=(sd/my)*100
		replace gini=gini*100
		recode gini cv (0=.)

		label var my "Interest rates: Mean"
		label var cv "Interest rates: Coefficient of variation"
		label var sd "Interest rates: Standard deviation"
		label var gini "Interest rates: Gini"
		label var dev1 "abs((int_rt-M)/int_rt)"
		label var dev2 "abs((int_rt-M)/M)"

		cap drop time
		egen time=group(year biannual), label

		xtset fico_2d time
		
		label define fico_2d ///
			62 "620-629" ///
			63 "630-639" ///
			64 "640-649" ///
			65 "650-659" ///
			66 "660-669" ///
			67 "670-679" ///
			68 "680-689" ///
			69 "690-699" ///
			70 "700-709" ///
			71 "710-719" ///
			72 "720-729" ///
			73 "730-739" ///
			74 "740-749" ///
			75 "750-759" ///
			76 "760-769" ///
			77 "770-779" ///
			78 "780-789" ///
			79 "790-799" ///
			80 "800-809" ///
			81 "810-819" ///
			82 "820-829" ///
			83 "830-850", modify
		label val fico_2d fico_2d 

		note: Created on `= c(current_date)'
		note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
		compress
		pwd
		saveold ./mortgages_usa/year_biannual_sample_fico2d.dta, version(12)

	***********************************
	// fico2d-annual
	***********************************

	cd D:\E\freddiemac\april2021\full
	use historical_data_all_sample.dta, clear
	cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\

	tabstat int_rt, by(year) s(p10 mean p90)
	di 8/6.625
	* p90/p10 ratio in 1999: 1.2075472
	di 3.875/2.75
	* p90/p10 ratio in 2020: 1.4090909

	keep if year<=2019
	drop if fico_2d>=82 // groups are too small
	tab year

		cap drop biannual
		recode month (1/12=1), gen(annual)

		* dispersion measures at year-biannual-level
		local b "year annual fico_2d"
		bys `b': egen gini=gini(int_rt)

		*local b "year annual fico_2d"
		cap drop M dev?
		bys `b': egen M=mean(int_rt)
		gen dev1=abs((int_rt-M)/int_rt)
		label var dev1 "abs((int_rt-M)/int_rt)"
		gen dev2=abs((int_rt-M)/M)
		label var dev2 "abs((int_rt-M)/M)"

		*local b "year annual fico_2d"
		collapse (min) fico_min=fico (max) fico_max=fico (p50) fico_p50=fico ///
			(mean) my=int_rt gini dev1 dev2 M  fico_mean=fico (sd) sd=int_rt (count) N=int_rt ///
			, by(`b')

		cap drop cv
		gen cv=(sd/my)*100
		replace gini=gini*100
		recode gini cv (0=.)

		label var my "Interest rates: Mean"
		label var cv "Interest rates: Coefficient of variation"
		label var sd "Interest rates: Standard deviation"
		label var gini "Interest rates: Gini"
		label var dev1 "abs((int_rt-M)/int_rt)"
		label var dev2 "abs((int_rt-M)/M)"

		cap drop time
		egen time=group(year annual), label

		xtset fico_2d time
		
		label define fico_2d ///
			62 "620-629" ///
			63 "630-639" ///
			64 "640-649" ///
			65 "650-659" ///
			66 "660-669" ///
			67 "670-679" ///
			68 "680-689" ///
			69 "690-699" ///
			70 "700-709" ///
			71 "710-719" ///
			72 "720-729" ///
			73 "730-739" ///
			74 "740-749" ///
			75 "750-759" ///
			76 "760-769" ///
			77 "770-779" ///
			78 "780-789" ///
			79 "790-799" ///
			80 "800-809" ///
			81 "810-819" ///
			82 "820-829" ///
			83 "830-850", modify
		label val fico_2d fico_2d 

		note: Created on `= c(current_date)'
		note: C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\figures_2_3_4_5_6\data_for_figures_2_3_4_5_6.do
		compress
		pwd
		saveold ./mortgages_usa/year_annual_sample_fico2d.dta, version(12)
